# -*- coding: utf-8 -*-
import random
import scrapy
from henan.rules import *
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.client.mongo_client import Mongo
from zc_core.model.items import Box
from zc_core.util.http_util import retry_request
from zc_core.spiders.base import BaseSpider



class GroupSpider(BaseSpider):
    name = 'group'
    # 常用链接
    index_url = 'http://222.143.21.205:8081/?area='
    item_url = 'http://222.143.21.205:8081/product/detail/{}'
    same_list_url = 'http://222.143.21.205:8081/product/qtsj?xhbh={}&pmbh={}&pagenow={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(GroupSpider, self).__init__(batchNo=batchNo, *args, **kwargs)

    def start_requests(self):
        yield Request(
            url=self.index_url,
            callback=self.request_item,
            errback=self.error_back,
            dont_filter=True
        )

    def request_item(self, response):
        dist_sku_list = Mongo().aggregate('item_data_pool', pipeline=[{"$match": {'spuId': {'$exists': False}}}, {
            "$group": {"_id": "$skuName", "linkId": {"$first": "$$ROOT.linkId"}, "skuId": {"$first": "$$ROOT._id"},
                       "catalog3Id": {"$first": "$$ROOT.catalog3Id"}}}, {"$project": {"linkId": 1, "skuId": 1,
                                                                                      "catalog3Id": 1}}])
        self.logger.info('目标：%s' % (len(dist_sku_list)))
        random.shuffle(dist_sku_list)
        for sku in dist_sku_list:
            sku_id = sku.get('skuId')
            link_id = sku.get('linkId')
            cat3_id = sku.get('catalog3Id')
            decode_cat3_id = decode_base64(cat3_id)

            # 采集商品详情
            yield Request(
                url=self.item_url.format(link_id),
                callback=self.parse_item_data,
                errback=self.error_back,
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                    'catalog3Id': cat3_id,
                    'decodeCatalog3Id': decode_cat3_id,
                },
            )

    def parse_item_data(self, response):
        meta = response.meta
        cat3_id = meta.get('catalog3Id')
        decode_cat3_id = meta.get('decodeCatalog3Id')
        item = parse_item_data(response)
        if item and item.get('spuId', '') and cat3_id:
            self.logger.info('商品: [%s]' % item.get('skuId'))
            yield item

            # 采集关联关系
            page = 1
            spu_id = item.get('spuId')
            yield Request(
                url=self.same_list_url.format(spu_id, decode_cat3_id, page),
                callback=self.parse_same_list,
                errback=self.error_back,
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'spuId': spu_id,
                    'catalog3Id': cat3_id,
                    'decodeCatalog3Id': decode_cat3_id,
                    'page': page,
                },
            )

    # 处理ItemData
    def parse_same_list(self, response):
        meta = response.meta
        spu_id = meta.get('spuId')
        cat3_id = meta.get('catalog3Id')
        decode_cat3_id = meta.get('decodeCatalog3Id')
        curr_page = meta.get('page')
        skus, items, total_page = parse_same_list(response)
        if skus and items:
            yield Box('sku', self.batch_no, skus)
            yield Box('item', self.batch_no, items)
            self.logger.info('同款1: spu=%s, page=%s, cnt=%s' % (spu_id, curr_page, len(skus)))
        # 换一批
        if total_page > 1:
            for page in range(2, total_page + 1):
                yield Request(
                    url=self.same_list_url.format(spu_id, decode_cat3_id, page),
                    callback=self.parse_more_same_list,
                    errback=self.error_back,
                    meta={
                        'reqType': 'item',
                        'batchNo': self.batch_no,
                        'spuId': spu_id,
                        'catalog3Id': cat3_id,
                        'decodeCatalog3Id': decode_cat3_id,
                        'page': page,
                    },
                )

    # 换一批
    def parse_more_same_list(self, response):
        meta = response.meta
        spu_id = meta.get('spuId')
        curr_page = meta.get('page')
        skus, items, total_page = parse_same_list(response)
        if skus and items:
            yield Box('sku', self.batch_no, skus)
            yield Box('item', self.batch_no, items)
            self.logger.info('同款2: spu=%s, page=%s, cnt=%s' % (spu_id, curr_page, len(skus)))
